package com.linghang.wusthelper.spider.jwc;

import com.linghang.wusthelper.dao.primary.StudentDao;
import com.linghang.wusthelper.entity.primary.Student;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * @author origin
 * 学生信息爬虫
 */
public class StuInfoSpider implements PageProcessor {


    private Logger logger = LoggerFactory.getLogger(getClass());

    private StudentDao studentDao;

    private String lock;

    private Site site = Site.me()
            .setRetryTimes(3)
            .setTimeOut(3000)
            .addHeader("Connection", "keep-alive")
            .addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
            .addHeader("Accept-Encoding", "gzip,deflate,sdch")
            .addHeader("Accept-Language", "zh-CN,zh;q=0.8")
            .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36")
            .addHeader("Cache-Control", "no-cache")
            .addHeader("Host", "jwxt.wust.edu.cn")
            .setDomain("jwxt.wust.edu.cn");

    public StuInfoSpider(String JSESSIONID,String SERVERID,StudentDao studentDao,String lock){
        site.addCookie("JSESSIONID",JSESSIONID);
        site.addCookie("SERVERID",SERVERID);
        this.studentDao = studentDao;
        this.lock = lock;
    }

    @Override
    public void process(Page page) {
        String stuNum = page.getHtml().xpath("//*[@id=\"xjkpTable\"]/tbody/tr[3]/td[5]/text()").regex("学号：(.*)").get();
        String stuName = page.getHtml().xpath("//*[@id=\"xjkpTable\"]/tbody/tr[4]/td[2]/text()").get().replaceAll(" ","");
        String colloge = page.getHtml().xpath("//*[@id=\"xjkpTable\"]/tbody/tr[3]/td[1]/text()").regex("院系：(.*)").get();
        String marjor = page.getHtml().xpath("//*[@id=\"xjkpTable\"]/tbody/tr[3]/td[2]/text()").regex("专业：(.*)").get();
        String classes = page.getHtml().xpath("//*[@id=\"xjkpTable\"]/tbody/tr[3]/td[4]/text()").regex("班级：(.*)").get();
        String birthday = page.getHtml().xpath("//*[@id=\"xjkpTable\"]/tbody/tr[5]/td[2]/text()").get().replaceAll(" ","");

        if (stuNum != null){
            Student student = new Student();
            student.setStuNum(stuNum);
            student.setStuName(stuName);
            student.setCollege(colloge);
            student.setMarjor(marjor);
            student.setClasses(classes);
            student.setBirthday(birthday);
//            System.out.println(student);
            studentDao.updateByStuNum(student);
        }
        synchronized (lock){
            lock.notify();
        }
    }

    @Override
    public Site getSite() {
        return site;
    }
}
